log using "$logs/predict_census.log", replace

// read data
use "$temp/clean_census_5pct", clear

ren edl educ
ren exp expr
ren ind1 ind
ren statefip state
ren perwt weight
ren occ_det occ_detail

// drop observations with any missing variables
foreach v in lnwage educ expr state weight occ_major occ_minor occ_detail female ind {
    drop if `v'==.
}

// generate variables not to be interacted
foreach v in race educ occ_major occ_minor state {
    gen `v'0=`v'
}

// see the range of categorical variables
sum educ race state occ_major occ_minor occ_detail ind if female==0
sum educ race state occ_major occ_minor occ_detail ind if female==1

levelsof occ_major if occ_major>11, local(occ_majors)
levelsof occ_minor if occ_minor>111, local(occ_minors)
levelsof state if state>1, local(states)


/*

// first specification: major occupation * state not controlling for detailed occupation
//_________________________________________________________________________________________
forval f=0/1{
    
preserve

    keep if female==`f'

    // run regression
    reg lnwage (i.race0 i.educ0)##c.expr c.expr#c.expr i.ind i.occ_major0 i.state0 (i(1/4).educ i(1/1).race i(`occ_majors').occ_major)#i(`states').state [fw=weight]

    // remove mean differences by education, race, experience, and industry
    replace race0=0
    replace educ0=0
    replace expr=0
    replace ind=1
    predict predicted_lnwage_mean_occ_state

    // remove mean differences by occupation, but not state
    replace occ_major0=11
    predict predicted_lnwage_mean_state

    // remove mean differences by state, but not occupation
    replace occ_major0=occ_major
    replace state0=1
    predict predicted_lnwage_mean_occ

    // remove mean differences by occupation and state
    replace occ_major0=11
    predict predicted_lnwage

    bys female educ race occ_major state: gen n_obs=_N
    keep predicted* female educ race occ_major state n_obs
    
    duplicates drop
    isid female educ race occ_major state
    

    save "$temp/predicted_lnwage1_female`f'", replace
	

    restore
}




// second specification: major occupation * state controlling for detailed occupation
//_________________________________________________________________________________________
forval f=0/1{
    
preserve

    keep if female==`f'

    // run regression
    reg lnwage (i.race0 i.educ0)##c.expr c.expr#c.expr i.ind i.occ_detail i.state0 (i(1/4).educ i(1/1).race i(`occ_majors').occ_major)#i(`states').state [fw=weight]

    // remove mean differences by education, race, experience, and industry
    replace race0=0
    replace educ0=0
    replace expr=0
    replace ind=1

    // also remove mean differences by occupation, but not state
    replace occ_detail=2
    predict predicted_lnwage_mean_state

    // remove mean differences by occupation and state
    replace state0=1    
    predict predicted_lnwage

    bys female educ race occ_major state: gen n_obs=_N
    keep predicted* female educ race occ_major state n_obs
    
    duplicates drop
    isid female educ race occ_major state
    

    save "$temp/predicted_lnwage2_female`f'", replace
	
    restore
}


*/


// third specification: minor occupation * state not controlling for detailed occupation
//_________________________________________________________________________________________
forval f=0/1{
    
preserve

    keep if female==`f'
    
    // run regression
    reg lnwage (i.race0 i.educ0)##c.expr c.expr#c.expr i.ind i.occ_minor0 i.state0 (i(1/4).educ i(1/1).race i(`occ_minors').occ_minor)#i(`states').state [fw=weight]

    // remove mean differences by education, race, experience, and industry
    replace race0=0
    replace educ0=0
    replace expr=0
    replace ind=1
    predict predicted_lnwage_mean_occ_state

    // remove mean differences by occupation, but not state
    replace occ_minor0=111
    predict predicted_lnwage_mean_state

    // remove mean differences by state, but not occupation
    replace occ_minor0=occ_minor
    replace state0=1
    predict predicted_lnwage_mean_occ

    // remove mean differences by occupation and state
    replace occ_minor0=111
    predict predicted_lnwage

    bys female educ race occ_minor state: gen n_obs=_N
    keep predicted* female educ race occ_minor state n_obs
    
    duplicates drop
    isid female educ race occ_minor state
    

    save "$temp/predicted_lnwage3_female`f'", replace

    restore
}



// combine male and female results
forval j=3/3{
    clear
	append using "$temp/predicted_lnwage`j'_female0" "$temp/predicted_lnwage`j'_female1"
    save "$data_der/predicted_lnwage`j'", replace
    erase "$temp/predicted_lnwage`j'_female0.dta"
    erase "$temp/predicted_lnwage`j'_female1.dta"
	
}

log close


// delete some temporary data sets

erase "$temp/clean_census_5pct.dta"
erase "$temp/census_raw.dta"